//clean psid after renaming protocol
use "$temp/psid_long", clear
order uniqid year
drop if year<1970 //bucketed wages here, which are bad

*****get state fips codes
merge m:1 state using "$data/Crosswalks/psid_state_codes"
keep if _merge==3
drop _merge state
merge m:1 state_full using "$data/Crosswalks/state_fips_crosswalk", nogen
drop state_full state

*****deal with top-coded earnings
preserve
import excel "$data/PSID/top codes.xlsx", firstrow clear
ren Y year
ren Top topcode
destring topcode, replace
ren D flag
save "$data/PSID/top_codes", replace
restore

merge m:1 year using "$data/PSID/top_codes", nogen
drop if wages == topcode & flag  //drop if sample that we don't want
replace wages = wages*1.5 if wages==topcode & !flag  //multiply by 1.5 if actual topcode
drop topcode flag

*****shift earnings/hours variables to reflect year of interview
sort uniqid year
gen wage_temp = . //temp variable to hold correct wage values
replace wage_temp = wages[_n+1] if uniqid[_n] == uniqid[_n+1] & year[_n] <=1996 //translation: use next year's wage measurement if still in yearly data phase
replace wage_temp = (1/2)*wages[_n+1] + (1/2)*wages[_n] if uniqid[_n] == uniqid[_n+1] & year[_n] >=1997 //translation: use midpoint of last and next year's wage if in biannual phase

gen hours_temp = . //temp variable to hold correct wage values
replace hours_temp = hours[_n+1] if uniqid[_n] == uniqid[_n+1] & year[_n] <=1996 //translation: use next year's wage measurement if still in yearly data phase
replace hours_temp = (1/2)*hours[_n+1] + (1/2)*hours[_n] if uniqid[_n] == uniqid[_n+1] & year[_n] >=1997 //translation: use midpoint of last and next year's wage if in biannual phase

//update
replace wages = wage_temp
replace hours = hours_temp
drop wage_temp hours_temp

*****deflate to 2012 dollars
preserve
import delimited "$data/GDP/gdp_pce_deflator", varn(1) clear
gen year = substr(date, -4, .)
destring year, replace
keep year deflator
save "$data/GDP/gdp_pce_deflator", replace
restore

merge m:1 year using "$data/GDP/gdp_pce_deflator", keep(match) nogen
replace deflator = deflator/100 //normalize
replace wages = wages/deflator //deflate wages
sort uniqid year
local deflator_1968 = 19.152 //1968 deflator

*****create "birth" state -- for first go, state at 17
gen temp = .
replace temp = statefips if age == 17
bys uniqid: egen birth_state = max(temp)
count if birth_state == .

//in case of individual being skipped in interview, add a few more candidate years to get the birth state
forval y = 1/3{
	replace temp = .
	replace temp = statefips if age == 17 - `y'
	bys uniqid: egen birth_state_temp = max(temp)
	replace birth_state = birth_state_temp if birth_state == . & birth_state_temp!=. 
	drop birth_state_temp
}
drop temp
count if birth_state == . //gets another 80k observations -- not bad

*****sample restrictions
gen wages_1968 = wages * (100/`deflator_1968')
drop if relate != 1 & year<=1982 //restrict to household heads
drop if relate != 10 & year>=1983 //restrict to household heads
keep if age>=18 & age<=72 //age range
drop if age>=36 & (hours<520 | wages_1968<1500) //adequate work for older heads
drop if age<36 & (hours<260 | wages_1968<1000) //adequate work for older heads
drop if hours>5820 //too many hours
sort uniqid year

//fill in education fror 1970-1974
replace educ = educ[_n+1] if educ == 0 & year == 1974 & uniqid == uniqid[_n+1]
replace educ = educ[_n+1] if educ == 0 & year == 1973 & uniqid == uniqid[_n+1]
replace educ = educ[_n+1] if educ == 0 & year == 1972 & uniqid == uniqid[_n+1]
replace educ = educ[_n+1] if educ == 0 & year == 1971 & uniqid == uniqid[_n+1]
replace educ = educ[_n+1] if educ == 0 & year == 1970 & uniqid == uniqid[_n+1]

//windsorize wages at 99th percentile
su wages, d
//replace wages = `r(p99)' * 1.5 if wages>`r(p99)'
replace wages = `r(p99)' if wages>`r(p99)'
su wages, d



drop if educ == 0 | educ == 98 | educ == 99 //kill bad education values
gen coll = (educ>=16)
save "$temp/psid_long_cleaned", replace
//end of dofile

